#-*- codeing = utf-8 -*-
#@Time : 2020/10/20 21:55
#@Author : 阳某
#@File : Pandas的Categorical数据类型可以降低数据存储提升计算速度.py
#@Software : PyCharm

import pandas as pd
df = pd.read_csv("./datas/movielens-1m/users.dat",
                 sep="::",
                 engine="python",
                 header=None,
                 names="UserID::Gender::Age::Occupation::Zip-code".split("::"))

print(df.head())
print(df.info())
print(df.info(memory_usage="deep"))
df_cat = df.copy()
print(df_cat.head())

# 2、使用categorical类型降低存储量
df_cat["Gender"] = df_cat["Gender"].astype("category")
print(df_cat.info(memory_usage='deep'))
print(df_cat.head())
print(df_cat["Gender"].value_counts())

# 3、提升运算速度
# %timeit df.groupby("Gender").size()
# %timeit df_cat.groupby("Gender").size()
